
SPIDER_MODULES = ['product_spider.spiders']  # 项目名.爬虫目录
NEWSPIDER_MODULE = 'product_spider.spiders'
print("123123132")
# 启用Playwright下载器中间件
DOWNLOAD_HANDLERS = {
    "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
    "https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
}

PLAYWRIGHT_CDP_URL = "http://127.0.0.1:9222"
# 启用Playwright中间件
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"

# 配置Playwright
PLAYWRIGHT_BROWSER_TYPE = "chromium"
PLAYWRIGHT_LAUNCH_OPTIONS = {
    "headless": False,
    "timeout": 30 * 1000,  # 30秒
    'args': [
                '--disable-blink-features=AutomationControlled',
                '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
            ],
}

# 设置并发请求数
CONCURRENT_REQUESTS = 8

# 配置自动限速
AUTOTHROTTLE_ENABLED = True
AUTOTHROTTLE_START_DELAY = 5
AUTOTHROTTLE_MAX_DELAY = 60
AUTOTHROTTLE_TARGET_CONCURRENCY = 4.0

# 启用并配置中间件
DOWNLOADER_MIDDLEWARES = {
    'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
    'scrapy_user_agents.middlewares.RandomUserAgentMiddleware': 400,
    'scrapy.downloadermiddlewares.retry.RetryMiddleware': 500,
    # 'scrapy_playwright.middleware.PlaywrightMiddleware': 543,
}

# 配置重试机制
RETRY_ENABLED = True
RETRY_TIMES = 3
RETRY_HTTP_CODES = [500, 502, 503, 504, 400, 403, 404, 408]

# 配置Item Pipeline
ITEM_PIPELINES = {
    'product_spider.pipelines.SaveToTxtPipeline': 300,
}

# 设置日志级别
# LOG_LEVEL = 'DEBUG'
